
* Importing raw data ===========================================================

* 2017 data for private households only ========================================
import sas using ${rawdata}mreg_g2017, clear
gen year = 2017 // Storing the "dataset year"	

preserve 
	import sas using ${rawdata}mreg_org_g2017, clear
	gen year = 2017
	tempfile to_append 
	save `to_append', replace 
restore 
append using `to_append'
keep w17_1455_lnr w17_1455_orgnr w17_1455_regnr ///
	sp_skilt rutine kj_art freg_arh freg_aar reg_aarh reg_dato ///
	kjt_grup typekode drivstof avr_aarh avreg_da stjaalet utfoert ///
	vrak_dato freg_dato bruktimp year

* Looping over remaining years, appending datasets and removing duplicates =====
forvalues year = 2016(-1)2005 {		
	foreach filename in g org_g {
		preserve 
			di "Importing file mreg_`filename'"  `year'
			import sas using ${rawdata}mreg_`filename'`year', clear
			foreach var in reg_dist merkekod modell type aksler motor_kw ///
				lengde bredde tot_vekt eg_vekt nyttelas ak_tr_b sittepl ///
				avr_dist part_utsl nox_utsl driv_forb avgassk co2_utsl ///
				eu_kar_tp p_filter slagvolum motor_khw hk co2 eu_ktp {
					qui capt drop `var'
				} // dropping variables that appear in some of the datasets
			gen year = `year'
			tempfile to_append
			save `to_append', replace
		restore // back to original dataset
		
		di "Appending to original dataset."
		append using `to_append'
	}	
}
	
compress

rename w17_1455_regnr carid
rename w17_1455_lnr ownerid

* When firms are car owners
replace ownerid = w17_1455_orgnr if ownerid == ""	
drop w17_1455_orgnr
order carid, b(ownerid)

* Registration date ============================================================
gen time_helpvar = reg_aarh + reg_dato
replace time_helpvar = substr(time_helpvar,1,6) + "01" ///
	if substr(time_helpvar,-2,.) == "00"	
gen reg_DMY = date(time_helpvar, "YMD")	
format reg_DMY %td
drop time_helpvar reg_aarh reg_dato

* First time registration ======================================================
gen freg_Y = freg_arh + freg_aar
destring freg_Y, replace
drop freg_arh freg_aar
* Most cars have date for first time registration, some older cars only have year
gen freg_DMY = date(freg_dato, "YMD")
format freg_DMY %td
drop freg_dato

* === Freg year ===
egen tag = tag(carid freg_Y)
egen distinct = total(tag), by(carid)
bys carid: egen aux = min(freg_Y) if distinct > 1 
replace freg_Y = aux if aux != . & distinct > 1 
drop aux tag distinct

* === Freg date ===
* Setting first registration date to the first date registered on each car
bys carid: egen aux = min(freg_DMY)
replace freg_DMY = aux //if aux != . & distinct > 1 
drop aux

/* Adding first time registration date as jan 1st for the
years where the date is missing */
gen freg_date = (freg_DMY != .)
replace freg_DMY = mdy(1,1,freg_Y) if freg_DMY == .
replace freg_DMY = reg_DMY if freg_DMY == . // (only applies for one car)
drop freg_Y
compress

* Deregistration date
gen time_helpvar = avr_aarh + avreg_da	
gen avreg_DMY = date(time_helpvar, "YMD")	
format avreg_DMY %td
drop time_helpvar avr_aarh avreg_da

* Scrap date
gen scrap_DMY = date(vrak_dato, "YMD")
format scrap_DMY %td
drop vrak_dato

destring sp_skilt rutine kj_art kjt_grup typekode drivstof ///
	stjaalet utfoert bruktimp, replace
	
* ==============================================================================

* Distinct fuel types per car id
egen tag = tag(carid drivstof)
egen distinct = total(tag), by(carid)

* Rule one: if one of multiple fuel types is 0, drop it
gen fuel = drivstof
replace fuel = . if fuel == 0 & distinct > 1	
drop distinct tag

egen tag = tag(carid fuel)
egen distinct = total(tag), by(carid)

* Rule two: if one of multiple fuel types are 6 or 7, keep this
gen aux = drivstof if distinct > 1 & (drivstof == 7 | drivstof == 8)
bys carid: egen aux2 = mean(aux)
replace fuel = aux2 if aux2 != drivstof & aux2 != . ///
	& (drivstof != 7 | drivstof != 8)
drop distinct tag aux aux2

egen tag = tag(carid fuel)
egen distinct = total(tag), by(carid)
replace fuel = 7 if fuel > 7 & fuel < 8

* Rule three: if #of fuel types is still >1, keep the most recent
bys carid: egen year_min = min(year)
bys carid: egen year_max = max(year)
  
gen aux = fuel if year == year_max
bys carid: egen aux2 = mean(aux) 
replace fuel = aux2 if aux2 != fuel & aux2 != .  

drop distinct tag aux aux2
egen tag = tag(carid fuel)
egen distinct = total(tag), by(carid)
drop tag distinct drivstof

* === Used car import ==========================================================
bys carid: egen aux = mean(bruktimp)
replace bruktimp = 1 if aux != 0 & aux != 1 & aux != . 
drop aux
bys carid: egen aux = mean(bruktimp) 
replace bruktimp = aux if bruktimp == .
drop aux

order freg_DMY reg_DMY avreg_DMY scrap_DMY year_min year_max fuel, b(sp_skilt)

compress
// Dropping duplicates
bys carid year: keep if _n == 1
save "${newdata}lnr_regnr_year.dta", replace
* ==============================================================================
